********************************************************************************
*********** This program is written for the present value project **************
********************************************************************************
* 01. Construction of Annual Data 
********************************************************************************

 

********************************************************************************
* 01.00  Initialize STATA
********************************************************************************
clear all
set more off, perm

*********************************************************
* Settings for the intangible capital variable  (01.04) *
*********************************************************
* Deflate intangible to real series
local real = 0

* Parameters
local delta = 0.2			/* Depreciation rate of organization capital */
local delta_xrd = 0.15		/* Depreciation rate of knowledge capital for others */
local g = 0.1				/* Growth rate of XSGA expenditures */
*********************************************************




********************************************************************************
* 01.01  Monthly FF and ten FF industries
********************************************************************************
import delimited "~/Dropbox/Research/04 - Present Value Identity/Data/ff/Downloaded/F-F_Research_Data_Factors.csv", ///
	varnames(4) rowrange(5:1154) clear 
	
	* time identifier
	gen t = ym(floor(v1/100),v1-100*floor(v1/100))
		format t %tm
	keep t mktrf-rf
	save "Data/Created/ff research data factors", replace

	// FF 10 industries 
	foreach ind in 10 {
		import excel "~\Dropbox\Research\04 - Present Value Identity\Data\ff\Downloaded\Siccodes`ind'.xlsx", clear
		// Separate industry number and label
			gen indnum = substr(A,1,2)
			gen ind = substr(A,3,10)
			destring indnum, force replace
			replace indnum = indnum[_n-1] if indnum==.
			replace ind = ind[_n-1] if ind=="" 
		// Separate SIC ranges
			gen sic_beg = substr(B,1,4)
			gen sic_end = substr(B,6,4)
			destring sic*, force replace
			drop if B=="" | sic_beg==.
		// If number consecutive, merge rows
			replace sic_beg = sic_beg[_n-1] if (sic_beg == sic_end[_n-1] + 1) ///
												* (ind == ind[_n-1])
			drop if sic_beg == sic_beg[_n+1]
		// Generate index and save permanently
			gen index = [_n]
			local vars = "index indnum-sic_end"
			keep `vars'
			order `vars'
			save "Data\Created\Siccodes`ind'", replace
	}

	
********************************************************************************
* 01.02  Monthly CRSP
********************************************************************************
* Load CRSP
	use PERMNO date EXCHCD DLSTCD DLRET DLPRC PRC RET SHROUT CFACPR CFACSHR ///
		DIVAMT SHRCD VOL SICCD using "~/Dropbox/Research/04 - Present Value Identity/Data/crsp/Downloaded/all crsp monthly 1926-2022", clear			

	* lowercase
	foreach var of varlist PERMNO-CFACSHR {
		rename `var', lower
	}	
	
	* Keep common shares from the 3 exchanges, but fill in missing exchcd first
	bys permno (date): replace exchcd = exchcd[_n-1] if exchcd==0 & exchcd[_n-1]!=0
	keep if inlist(exchcd,1,2,3) & inlist(shrcd,10,11)
	
	* Time identifier
	gen t = mofd(date)
	format t %tm
	gen month = month(date)
	
	* Add up different distributions (divamt not used directly later but used to 
	* infer price vs dividend split at delisting) 
	replace divamt = 0 if mi(divamt) | divamt < 0
	bys permno t: egen divsum = sum(divamt)
	bys permno t: keep if _n==_N
		replace divamt = divsum
		drop divsum
		
	* Treat bid-ask as price 
	replace prc = abs(prc) 
	* If dlprc not missing and is not the same as prc (permno 10301), add its absolute value to prc
	replace prc = cond(mi(prc),abs(dlprc),prc+abs(dlprc)) if ~mi(dlprc) & prc!=abs(dlprc)
	
	* Adjust ret for dlstcd if dlret not missing (hence not a Shumway adjustment yet)
	replace dlret = -1 if dlret < -1
	replace ret = cond(mi(ret),dlret,(1+ret)*(1+dlret)-1) if ~mi(dlret) 
	
	* Adjust ret for dlstcd if bankruptcy but dlret missing (Shumway adjustment; need to adjust for ret and prc)
	gen failret = cond(inlist(exchcd,1,2),-.35,-.55) if mi(dlret) & ///
		(dlstcd==500|(dlstcd>=520&dlstcd<=584))
	replace ret = (1+ret)*(1+failret) - 1 if ~mi(failret)
	replace prc = prc * (1+failret) if ~mi(failret) 
	
	* If cfacpr missing due to delisting if shrout same as previous month
	bys permno (t): replace cfacpr = cfacpr[_n-1] if cfacpr==0 & _n == _N & ///
		shrout==shrout[_n-1] 
	bys permno (t): replace cfacshr = cfacshr[_n-1] if cfacshr==0 & _n == _N & ///
		shrout==shrout[_n-1]
	
	* Code as missing if cfacpr==0 or cfacpr < 10^(-5) in all other cases (e.g., permno==92433)
	replace cfacpr = . if cfacpr==0 | cfacpr < 10^(-5)
	replace cfacshr = . if cfacshr==0 | cfacshr < 10^(-5)
	
	* Treat all delisting cash flows as dividends (findings robust to treating only
	* delistings due to bankruptcy as dividends, so do the simple thing)
	* One exception: delisting in the last year of observation (dlstcd  = 100)
	replace prc = 0 if ~mi(dlstcd) & dlstcd!=100
		
	* Remove all entries after a delisting code
	bys permno (t): replace dlstcd = dlstcd[_n-1] if permno==permno[_n-1] & ~mi(dlstcd[_n-1])
	drop if permno==permno[_n-1] & ~mi(dlstcd) & ~mi(dlstcd[_n-1])
	
	* Code delisting
	gen delisting = cond(permno!=permno[_n+1], 1, 0) & dlstcd!=100
	
	* Code price and return as missing if delisting without a dlstcd
	replace prc = . if delisting==1 & mi(dlstcd)
	replace ret = . if delisting==1 & mi(dlstcd)
	
	* (Nov2021 Edit) Pontiff and Woodgate (2008) - Issuance variable
	g PW_shrout = cfacshr * shrout
	bys permno (t): g PW_issue = PW_shrout[_n-6]/PW_shrout[_n-17] ///
		if t==t[_n-6]+6 & t==t[_n-17]+17	/* PW (2008) allows a 6-month lag */
	replace PW_issue = ln(PW_issue)			/* PW (2008) use log */
	
	* Temporarily save monthly crsp
	tempfile crspm
	save "`crspm'"
	
	* Load risk-free rate and merge
	merge m:1 t using "~/Dropbox/Research/04 - Present Value Identity/Data/ff/Created/ff research data factors", keepusing(rf)
		replace rf = rf/100
		drop if _merge==2
		drop _merge
		sort permno t

	////////// Security-level quantities

	* Turn bid-ask quotes into price and adjust for splits
	gen P = prc / cfacpr 
	
	* Adjust shares for splits
	gen N = shrout * cfacshr

	* Code missing returns as zero unless it's the firm's first observation
	replace ret = . if inlist(ret,-66,-77,-88,-99)
	bys permno t: replace ret = 0 if mi(ret) & permno==permno[_n-1]  
	
	* Annualize returns																
	gen R = ret 
	gen Rf = rf 

	* Compute backward looking return from previous June 
	gen monthsfromjune = cond(month >= 7, month-6, month+6)
	forval i = 1/11 {
		qui bys permno (t): replace R = (1+R)*(1+ret[_n-`i'])-1 if ///
			t==t[_n-`i']+`i' & `i' <= monthsfromjune - 1
		qui bys permno (t): replace Rf = (1+Rf)*(1+rf[_n-`i'])-1 ///
			if t==t[_n-`i']+`i' & `i' <= monthsfromjune - 1
	}
	replace R = . if month[_n-monthsfromjune]!=6 | /// 							// Ensures that 12-month returns 
							permno!=permno[_n-monthsfromjune] 					// computed only when there is full data
	replace Rf = . if month[_n-monthsfromjune]!=6 | ///							// from June.
							permno!=permno[_n-monthsfromjune]						

	* Trading volume
	bys permno (t): g vol_L1 = vol[_n-1] if t==t[_n-1]+1					

	* At least one return observation each year over the last 5 years
	g Iret = 1
	forvalues i = 0/4 {
		gen ret_valid = 0
		forvalues j = 1/12 {
			local k = 12 * `i' + `j'
			qui bys permno (t): replace ret_valid = 1 if ~mi(ret[_n-`k']) & ///
				t==t[_n-`k']+`k'
		}
		qui replace Iret = Iret * ret_valid
		drop ret_valid
	} 

	* Momentum (excluding last month return)
	bys permno (t): g mom = 1 if t==t[_n-12]+12 & ~mi(P[_n-1]) & ~mi(P[_n-12])
	forval i = 1/11 {
		bys permno (t): replace mom = mom*(1+ret[_n-`i']) if ///	
			~mi(ret[_n-`i']) & t==t[_n-`i']+`i'
	}

	* Compute implied dividend from price changes and returns
	bys permno (t): gen D = (1 + R) * P[_n-monthsfromjune] - P if ///
		t <= t[_n-1] + 12

	* Filter out rounding errors in cfacpr (up to 0.2% rounding error in cfacpr)
	qui bys permno (t): replace D = 0 if abs(D/P[_n-monthsfromjune]) < .002 ///
		| D < 0 
											 
/*
* Dividend error
bys permno: gen DY = D/P[_n-1]
bys permno: egen mean_DY = mean(DY)
summarize mean_DY, detail
keep if ~mi(mean_D) & (mean_D < 0 | mean_D > 1)

*/

/*
* Return error
bys permno (t): gen R_err = (D + P) / P[_n-monthsfromjune] - (1 + R)
summarize R_err, detail
*/
 
	* Market equity and total dividends
	gen ME = P * N
	bys permno (t): gen ME_L1 = ME[_n-monthsfromjune]
	bys permno (t): gen Dtotal = D * N[_n-monthsfromjune]  
	bys permno (t): gen N_dilute = N[_n-monthsfromjune]/N  

	* Variables used to adjust accounting variables 
	forvalues fyr = 1/12 {
		qui gen monthsfrom`fyr' = cond(month >= 7, `fyr' - month, `fyr' - month -12)	
			* To understand this, take `fyr' = 12.
			* This variable is positive if fiscal yearend happens after the month (true
			* for Jul-Nov) and negative if fiscal yearend happens before the month (true
			* for Jan-Jun) 
			* The perspective here is that annual return is supposed to realize in Jun
			* of year t, even if delisting happens, for instance, in Jul of year t-1.
			* So this delisting event should be matched with Compustat variables in 
			* Dec of year t-1. 
		qui bys permno (t): gen ME_L1_`fyr' = ME[_n+monthsfrom`fyr']
		qui bys permno (t): gen N_dilute_`fyr' = N[_n+monthsfrom`fyr']/N  
	}
	
	* Issuance (Jul t-1 to Jun t)
	bys permno (t): gen issue = ME - ME[_n-monthsfromjune]*(1+R) + Dtotal

/*
* Security-level market equity error
bys permno (t): gen R_err2 = (Dtotal + N_dilute * ME) / ME[_n-monthsfromjune] - (1+R) if ///
	t <= t[_n-monthsfromjune] + 12
summarize R_err2, detail
*/
	
	* Year of CRSP data (if delisting in Jul-Dec of calendar year y-1, code as y
	gen year = cond(month >= 7, year(date)+1, year(date))
	
	* Keep June of each year (July t-1 - June t) or the delisting month  
	keep if month(date)==6 | delisting==1 
	
	* Keep necessary variables
	keep permno year t delisting date ME ME_L1 Dtotal ME_L1_* N_dilute_* ///
		issue monthsfromjune N_dilute R Rf Iret mom N exchcd vol_L1 siccd PW_issue
		
	* If new issue, set N_dilute to be zero
	bys permno (year): gen new = 1 if permno[_n-1]!=permno & ~mi(ME)
	replace N_dilute = 0 if new==1 & mi(N_dilute)
	replace Dtotal = 0 if new==1 & mi(Dtotal) 
		drop new
	forvalues fyr = 1/12 {
		bys permno (year): gen new = 1 if permno[_n-1]!=permno & ~mi(ME_L1_`fyr')
		replace N_dilute_`fyr' = 0 if new==1 & mi(N_dilute_`fyr') 
			drop new
	} 
	
	* Temporarily save CRSP
	tempfile crsp
	save "`crsp'"
	


********************************************************************************
* 01.03  Annual Compustat
********************************************************************************
* First, clear Compustat and merge back to CRSP
use GVKEY LPERMNO ajex pddur fyr fyear txditc capx at lt ceq seq pstkrv pstkl pstk ni ///
	dltt csho prcc_f sale ch revt cogs xsga xint using ///
	"~/Dropbox/Research/04 - Present Value Identity/Data/compustat/Downloaded/all ccm annual 1950-2021", clear

	* Lowercase
	rename LPERMNO permno
	rename GVKEY gvkey

	* Keep if the operation is 12-month
	keep if pddur == 12
	
/*	* Keep if fyr is 12
	keep if fyr==12
*/	
	* Convert fiscal year of calendar year and keep the latest fiscal year-end observation
	bys permno: replace fyr = fyr[_n-1] if mi(fyr)
	g year = cond(fyr <= 5, fyear + 2, fyear + 1)
	bys permno year (fyr): keep if _n == _N
	
	* Compute book equity
	replace txditc = 0 if mi(txditc)
	replace seq = at - lt if mi(seq)

	g preferred = pstkrv
	replace preferred = pstkl if preferred==.
	replace preferred = pstk if preferred==.
	replace preferred = 0 if preferred==.

	g BE_L1_fyr = seq+txditc-preferred 
	bys permno (year): gen BE_L2_fyr = BE_L1_fyr[_n-1] if year==year[_n-1]+1

	* Market equity from Compustat
	gen ME_L1_fyr_CCM = csho * prcc_f 
	
	* Operating profit
	recode cogs xint xsga (.=0) if cogs <. | xint <. | xsga <.
	gen OProf_L1_fyr = revt - cogs - xsga - xint

	* Gross profitability
	bys permno (year): gen GP_L1_fyr = (revt - cogs)/at[_n-1] if year==year[_n-1]+1

	
	* Greenwood-Hanson issuer-repurchaser spread
	* Compute net issuance for Greenwood-Hanson (DL EDIT)
// 	bys gvkey (year): gen NS_L1 = log(csho[_n-1]*ajex[_n-1]) - log(csho[_n-2]*ajex[_n-2]) if year==year[_n-1]+1 & year==year[_n-2]+2
// 	gen GHissuer = 1 if NS_L1 > 0.1
// 	gen GHrep = 1 if NS_L1 < -0.005
	bys gvkey (year): gen NS = log(csho*ajex) - log(csho[_n-1]*ajex[_n-1]) if year==year[_n-1]+1
	gen GHissuer = 1 if NS > 0.1
	gen GHrep = 1 if NS < -0.005
	
/*	* NYSE GH issuance deciles
	forvalues j = 10(10)90 {
		gsort year exchcd
		by year: egen p`j' = pctile(NS_L1) if exchcd==1, p(`j')
*		replace p`j' = p`j'[_n-1] if year==year[_n-1] & mi(p`j')
		}
	qui g bin_NS_L1 = 1 if NS_L1 <= p10 & ~mi(NS_L1)
	forvalues i = 2/9 {
		local j = 10*`i'
		qui replace bin_NS_L1 = `i' if NS_L1 < p`j' & ~mi(NS_L1) & mi(bin_NS_L1)
		}
	qui replace bin_NS_L1 = 10 if (NS_L1 >= p90) & ~mi(NS_L1) & mi(bin_NS_L1)
	drop p10-p90
*/	
/*		
	* Check that strange net income doesn't occur predominantly at delisting
	bys permno (year): gen ni_error = cond(ni < -BE_L2_fyr | ///
		BE_L1_fyr <= 0 | BE_L2_fyr <= 0, 1, 0)
	bys permno (year): gen delist = cond(_n == _N, 1, 0)
	summarize ni_error, detail
	summarize ni_error if delist==0, detail
	summarize ni_error if delist==1, detail
	* The error occurrence seems random, unrelated to delisting
*/ 
	* Net income (Vuolteenaho): don't let firms lose more than their book equity
		* Replacing ni with -BE_L2_fyr without changing BE_L1_fyr violates the
		* exact nonlinear identity, so just drop these observations 
	replace ni = . if ni < -BE_L2_fyr | BE_L1_fyr <= 0 | BE_L2_fyr <= 0 
 
	* Temporarily save CCM
	tempfile ccm
	save "`ccm'"
	


********************************************************************************
* 01.04  Annual Davis-Fama-French
********************************************************************************
import excel "~/Dropbox/Research/04 - Present Value Identity/Data/davis fama french/Downloaded/DFF_BE_With_Nonindust.xlsx", ///
	clear
	// Generate variable names
	rename (A B C) (permno begyr endyr)
	local y = 1926
	foreach var of varlist D-CA {
		replace `var' = . if `var'==-99.99
		rename `var' BE_DFF`y'
		local y = `y' + 1
	}
	reshape long BE_DFF, i(permno-endyr) j(year)
	rename BE_DFF BE_L1_fyr_DFF
	bys permno (year): gen BE_L2_fyr_DFF = BE_L1_fyr_DFF[_n-1] if year==year[_n-1]+1
	drop begyr endyr		
	// 1926 means data available by 1926m6 (French's website, so no need to
	// lag merge year)
	gen fyear = year -1
	gen myear = year
	drop year
	// Generate issuance-adjusted BE (note most fyear end is December)
	tempfile DFF
	save "`DFF'"
	use "`crspm'", clear
		// divamt falls with a month's lag when shrout rises (i.e. dividend
		// earned on previous month's shares outstanding)
		bys permno (t): gen Divamt = divamt * shrout[_n-1] * 10^(-3) ///
			if t==t[_n-1]+1 
		// 12-month sum
		gen Divamt12m = Divamt if month(dofm(t))==12
			forvalues i = 1/11 {
				bys permno (t): replace Divamt12m = Divamt12m + ///
					Divamt[_n-`i'] if month(dofm(t))==12 & t==t[_n-`i']+`i'
			}
		// Calculate December market values
		bys permno (t): gen MEdec = prc * shrout * 10^(-3)
		bys permno (t): gen MEdec_L12m = MEdec[_n-12] if t==t[_n-12]+12 
		// 12-month return
		gen R12m = 1 + ret if month(dofm(t))==12
			forvalues i = 1/11 {
				bys permno (t): replace R12m = R12m * (1 + ret[_n-`i']) ///
					if month(dofm(t))==12 & t==t[_n-`i']+`i'
			} 
		replace R12m = R12m - 1
		// Proxies for csho and prcc_f
		gen csho_DFF = shrout / 1000
		gen prcc_f_DFF = abs(prc)
		// Merge with DFF data
		keep if month(dofm(t))==12 
		gen myear = year(dofm(t))+1
		keep permno myear Divamt12m MEdec* R12m csho_DFF prcc_f_DFF
		merge 1:1 permno myear using "`DFF'"
		sort permno myear 
	// Compute implied ni_DFF  
	bys permno (myear): gen ni_DFF = ((1+R12m)*MEdec_L12m - Divamt12m) ///
		/ MEdec * BE_L1_fyr_DFF - BE_L2_fyr_DFF + Divamt12m
 	bys permno (myear): replace ni_DFF = . if ni_DFF < -BE_L2_fyr_DFF ///
		& myear==myear[_n-1]+1    
 	// fyear and fyr
	gen fyr_DFF = 12
	gen fyear_DFF = fyear 
	* Market equity (as if it is for Compustat)
	gen ME_L1_fyr_CCM_DFF = csho_DFF * prcc_f_DFF 
	// Keep if data not missing
	keep BE_L1_fyr_DFF ni_DFF fyr_DFF fyear_DFF ME_L1_fyr_CCM_DFF permno myear 
	keep if ~mi(BE_L1_fyr_DFF) | ~mi(ni_DFF)
	rename myear year
	
	* Temporarily save DFF
	tempfile dff
	save "`dff'"
	


********************************************************************************
* 01.05  Merge all
********************************************************************************
use "`ccm'", clear

	* Merge with DFF data
	merge 1:1 permno year using "`dff'", nogen
		gen gvkey_DFF = permno
		tostring gvkey_DFF, force replace
		replace gvkey = "D" + gvkey_DFF if mi(gvkey)
		replace fyr = fyr_DFF if mi(fyr)

	* Merge with CRSP data 
	merge 1:1 permno year using "`crsp'", nogen	

	* Fill in missing gvkey (missing gvkey or missing fyr) with future CCM links
	gsort permno -year
		bys permno: replace gvkey = gvkey[_n-1] if (mi(gvkey) & ~mi(gvkey[_n-1])) ///
			| (mi(at) & ~mi(gvkey[_n-1]))
		tempfile tmp
		save "`tmp'"
		// CCM unique
			use "`ccm'", clear 
			collapse fyear-prcc_f preferred-OProf_L1_fyr, by(gvkey year)
			tempfile ccmunique
			save "`ccmunique'"
		use "`tmp'", clear
		keep if mi(fyr) & mi(fyr_DFF)
		drop if mi(gvkey)
		keep gvkey year permno sale date-issue
		merge m:1 gvkey year using "`ccmunique'"
			drop if _merge==2
			drop _merge
		tempfile tmp2
		save "`tmp2'"
		use "`tmp'", clear
			drop if mi(fyr) & mi(fyr_DFF) & ~mi(gvkey)
			append using "`tmp2'"
			
	* Drop if gvkey missing
	drop if mi(gvkey)
	sort gvkey year
	order gvkey year
	
	* Save the gvkey-permno match
	tempfile maindata
	save "`maindata'"
		keep gvkey permno year
		save "Data/Created/gvkey_permno", replace
	use "`maindata'"	
		
	
	* Compute N_dilute_fyr
	gen N_dilute_fyr = .
	gen ME_L1_fyr = .
	forvalues fyr = 1/12 {
		qui replace N_dilute_fyr = N_dilute_`fyr' if fyr==`fyr'
		qui replace ME_L1_fyr = ME_L1_`fyr' if fyr==`fyr'
	}
	
	* Scale ME's and Dfirm from CRSP to be the same as Compustat
	foreach var in ME ME_L1_fyr ME_L1 Dtotal {
		replace `var' = `var' / 10^3
		replace `var' = . if mi(R)
	}

	* Keep multiple issue cases to study
	gen sclass = 1 
	bys gvkey year: egen Nsclass = sum(sclass) 
		drop sclass 
	sort gvkey year permno
	order gvkey year
	* Collapse by firm and adjust N_dilute if necessary 
	gen ME_L1_times_R = ME_L1 * R
	gen ME_L1_R = ME_L1 if ~mi(R)
	* ME weighted dilution
	gen ME_times_N_dilute = ME * N_dilute
	gen ME_N_dilute = ME if ~mi(N_dilute)
	* ME weighted N_dilute_fyr
	gen ME_times_N_dilute_fyr = ME * N_dilute_fyr
	gen ME_N_dilute_fyr = ME if ~mi(N_dilute_fyr)
	* (Nov2021): ME_L1 weighted PW Issuance
	gen ME_L1_PW_issue = ME_L1 if ~mi(PW_issue)
	gen ME_L1_times_PW_issue = ME_L1 * PW_issue
	collapse (sum) ME_L1_times_R ME_L1_R ME_times_N_dilute ME_N_dilute ///
		ME_L1 ME_L1_fyr ME D = Dtotal ME_times_N_dilute_fyr ME_N_dilute_fyr ///
		ME_L1_PW_issue ME_L1_times_PW_issue ///
		(mean) mom BE_L1_fyr ni ME_L1_fyr_CCM Rf at sale dltt vol_L1 OProf_L1_fyr ///
		GP_L1_fyr BE_L1_fyr_DFF ni_DFF ME_L1_fyr_CCM_DFF fyr_DFF ///
		(min) Iret exchcd (max) GHissuer GHrep fyr siccd, by(gvkey year Nsclass)
	gen R = ME_L1_times_R / ME_L1_R
	gen N_dilute = ME_times_N_dilute / ME_N_dilute
	gen N_dilute_fyr = ME_times_N_dilute_fyr / ME_N_dilute_fyr
	gen PW_issue = ME_L1_times_PW_issue/ME_L1_PW_issue 		/* Nov2021 */	
	replace ME = . if ME==0
	replace ME_N_dilute = . if ME_N_dilute==0
	drop ME_L1
	* When one of the share classes delists in a firm, N_dilute could be off.
	* In this case, adjust N_dilute using N_dilute_implied by marktet equity dynamics.
	bys gvkey (year): gen N_dilute_implied = ((1+R)*ME[_n-1]-D)/ME
	* If the difference arises for firms with a change in the number of share
	* classes, put in the change (e.g, gvkey 001076, 001097) 
	bys gvkey (year): replace N_dilute = N_dilute_implied if ///
		abs(N_dilute - N_dilute_implied) > .1 & Nsclass==Nsclass[_n-1]-1 & ///
		~mi(Nsclass) & ~mi(Nsclass[_n-1]) & ~mi(N_dilute_implied) & N_dilute_implied >= 0		
	* If there is still discrepancy, remove the firm from the data
	bys gvkey (year): gen delete=1 if abs(N_dilute - N_dilute_implied) > .1 & ///
		~(Nsclass==Nsclass[_n-1]-1 & ~mi(Nsclass) & ~mi(Nsclass[_n-1])) & ///
		~mi(N_dilute_implied)
	bys gvkey: egen deletemax = max(delete)
	drop if deletemax==1
		drop deletemax delete

/*
* Firm-level market equity error  
bys gvkey (year): gen R_err3 = (D + N_dilute * ME) / ME[_n-1] - (1+R) if year==year[_n-1]+1
summarize R_err3, detail
*/	 
	bys gvkey (year): gen AG = at / at[_n-1] if year==year[_n-1]+1
	
	* Compute additional firm-level variables
	gen BE = BE_L1_fyr * ME_L1_fyr / ME_L1_fyr_CCM / N_dilute_fyr
	bys gvkey (year): gen BE_L1 = BE[_n-1] if year==year[_n-1]+1
	gen Y = ni * ME_L1_fyr / ME_L1_fyr_CCM / N_dilute_fyr
		/* firm can't lose more than book equity */
		replace Y = -BE_L1 if Y < -BE_L1 & ~mi(BE_L1)
	gen OProf = OProf_L1_fyr * ME_L1_fyr / ME_L1_fyr_CCM / N_dilute_fyr
	gen GP = GP_L1_fyr * ME_L1_fyr / ME_L1_fyr_CCM / N_dilute_fyr
	
	* Compute additional firm-level variables (DFF)
	gen BE_DFF = BE_L1_fyr_DFF * ME_L1_fyr / ME_L1_fyr_CCM_DFF / N_dilute_fyr
	bys gvkey (year): gen BE_L1_DFF = BE_DFF[_n-1] if year==year[_n-1]+1
	gen Y_DFF = ni_DFF * ME_L1_fyr / ME_L1_fyr_CCM_DFF / N_dilute_fyr
		/* firm can't lose more than book equity */
		replace Y_DFF = -BE_L1_DFF if Y_DFF < -BE_L1_DFF & ~mi(BE_L1_DFF)
	
	* If BE negative, code it as missing
	replace BE = . if BE < 0
	replace BE_L1 = . if BE_L1 < 0
	
	* If BE negative, code it as missing (DFF)
	replace BE_DFF = . if BE_DFF < 0
	replace BE_L1_DFF = . if BE_L1_DFF < 0

	* Compute the ratios
	gen MB = ME / BE
	gen BM = BE / ME
	bys gvkey (year): gen MB_L1 = MB[_n-1] if year==year[_n-1]+1
	gen ROE = Y / BE_L1  
	gen SCALE = (D + N_dilute * BE) / (BE_L1 + Y)
	gen LAMBDA = N_dilute * BE / (D + N_dilute * BE)
	gen GAMMA = D / (D + N_dilute * BE)
	gen OP = OProf / BE

	* Compute the ratios (DFF)
	gen MB_DFF = ME / BE_DFF
	bys gvkey (year): gen MB_L1_DFF = MB_DFF[_n-1] if year==year[_n-1]+1
	gen ROE_DFF = Y_DFF / BE_L1_DFF  
	gen SCALE_DFF = (D + N_dilute * BE_DFF) / (BE_L1_DFF + Y_DFF)
	gen LAMBDA_DFF = N_dilute * BE_DFF / (D + N_dilute * BE_DFF)
	gen GAMMA_DFF = D / (D + N_dilute * BE_DFF)
 
/*
* Exact nonlinear identity error
gen R_err4 = (1+ROE) * SCALE * (1 + (MB - 1) * LAMBDA) / MB_L1 - (1+R)
summarize R_err4, detail

* Exact nonlinear identity error (DFF)
gen R_err5 = (1+ROE_DFF) * SCALE_DFF * (1 + (MB_DFF - 1) * LAMBDA_DFF) / MB_L1_DFF - (1+R)
summarize R_err5, detail
*/
 
/*
* Large GAMMA is due to delisting. Due to popularity of stock repurchases 
* instead of dividends, median Gamma really low.
summarize GAMMA, detail
* Median LAMBDA almost 0.98.
summarize LAMBDA, detail 
*/
	 
 
////////// Composite portfolio values (pre-rebalancing for time-t values)

* Assign parameters
gen tbw = 0.1
gen rho = 0.96

* Composite market variables
bys gvkey (year): gen MEcomp_L1 = ME[_n-1] if year==year[_n-1]+1
gen Rcomp = (1-tbw) * R + tbw * Rf
bys gvkey (year): gen Dcomp = (1-tbw) * D + tbw * (1-rho) * (1+Rf) * ME[_n-1] if year==year[_n-1]+1
bys gvkey (year): gen MEcomp = (1-tbw) * ME + tbw * rho * (1+Rf) * ME[_n-1] if year==year[_n-1]+1 

/*
* ME dynamics for the composite portfolio (doesn't have to hold due to dilution
gen R_err6 = (MEcomp + Dcomp) / MEcomp_L1 - (1 + Rcomp)
summarize R_err6, detail
*/

* Composite accounting variables
bys gvkey (year): gen BEcomp_L1 = (1-tbw) * BE_L1 + tbw * ME[_n-1]
bys gvkey (year): gen Ycomp = (1-tbw) * Y + tbw * Rf * ME[_n-1]
bys gvkey (year): gen BEcomp = (1-tbw) * BE + tbw * rho * (1+Rf) * ME[_n-1]

* Composite accounting variables (DFF)
bys gvkey (year): gen BEcomp_L1_DFF = (1-tbw) * BE_L1_DFF + tbw * ME[_n-1]
bys gvkey (year): gen Ycomp_DFF = (1-tbw) * Y_DFF + tbw * Rf * ME[_n-1]
bys gvkey (year): gen BEcomp_DFF = (1-tbw) * BE_DFF + tbw * rho * (1+Rf) * ME[_n-1]

* Composite portfolio ratios
gen MBcomp_L1 = MEcomp_L1 / BEcomp_L1
gen MBcomp = MEcomp / BEcomp
gen ROEcomp = Ycomp / BEcomp_L1
bys gvkey (year): gen N_dilute_comp = ((1-tbw) * ME * N_dilute + tbw * rho * (1+Rf) * ME[_n-1]) / ///
					((1-tbw) * ME + tbw * rho * (1+Rf) * ME[_n-1])
gen SCALEcomp =  (Dcomp + N_dilute_comp * BEcomp) / (BEcomp_L1 + Ycomp) 
gen LAMBDAcomp = N_dilute_comp * BEcomp / (Dcomp + N_dilute_comp * BEcomp) 
gen GAMMAcomp = Dcomp / (Dcomp + N_dilute_comp * BEcomp) 

* Composite portfolio ratios (DFF)
gen MBcomp_L1_DFF = MEcomp_L1 / BEcomp_L1_DFF
gen MBcomp_DFF = MEcomp / BEcomp_DFF
gen ROEcomp_DFF = Ycomp_DFF / BEcomp_L1_DFF 
gen SCALEcomp_DFF =  (Dcomp + N_dilute_comp * BEcomp_DFF) / (BEcomp_L1_DFF + Ycomp_DFF)
gen LAMBDAcomp_DFF = N_dilute_comp * BEcomp_DFF / (Dcomp + N_dilute_comp * BEcomp_DFF) 
gen GAMMAcomp_DFF = Dcomp / (Dcomp + N_dilute_comp * BEcomp_DFF) 
 
/*
* Exact nonlinear identity error for the composite portfolio
gen R_err7 = 1/(1+Rcomp) * (1+ROEcomp) * SCALEcomp * (1 + (MBcomp-1) * LAMBDAcomp) - MBcomp_L1
summarize R_err7, detail
*/
 
* Composite portfolio log ratios
gen r = log(1 + Rcomp)
gen mb_L1 = log(MBcomp_L1)
	gen mb_L1_DFF = log(MBcomp_L1_DFF)
gen bm_L1 = -mb_L1
	gen bm_L1_DFF = -mb_L1_DFF
gen mb = log(MBcomp) 
	gen mb_DFF = log(MBcomp_DFF) 
gen bm = -mb
	gen bm_DFF = -mb_DFF
gen roe = log(1 + ROEcomp)
	gen roe_DFF = log(1 + ROEcomp_DFF)
gen scale = log(SCALEcomp)
	gen scale_DFF = log(SCALEcomp_DFF)
gen m_L1 = mb_L1 + log(BEcomp_L1) 
	replace m_L1 = mb_L1_DFF + log(BEcomp_L1_DFF) if mi(m_L1) 
gen m = mb + log(BEcomp)
	replace m = mb_DFF + log(BEcomp_DFF) if mi(m)
gen m_adj = log(N_dilute_comp * exp(m))  /* share-adjusted: m_adj = log(N_dilute_comp * exp(m)) */
gen dfirm = log(Dcomp)
gen gamma = log(GAMMAcomp)
	gen gamma_DFF = log(GAMMAcomp_DFF) 
gen pva = log(1+(MBcomp-1)*LAMBDAcomp)-rho*mb
	gen pva_DFF = log(1+(MBcomp_DFF-1)*LAMBDAcomp_DFF)-rho*mb_DFF
gen sgrowth = log(1/N_dilute_comp)
gen assetgrowth = log(AG)
gen dp = dfirm - m_adj 
gen gp = log(1+GP)

* Replace with DFF values if missing
gen miss = 1 if mi(mb_L1) | mi(mb) | mi(roe) | mi(scale)
foreach var in mb_L1 bm_L1 mb bm roe scale gamma BE BE_L1 Y MB_L1 MB ROE ///
		SCALE GAMMA pva {
	replace `var' = `var'_DFF if miss==1 & ~mi(`var'_DFF)
}

* Composite dividend-price ratio
gen DP = exp(dp)

/*
gen lambda = log(LAMBDAcomp)
gen policy = (1-rho)*(gamma-log(1-rho)) + rho*(lambda-log(rho))
gen payout = (1-rho)*(gamma-log(1-rho))
gen plowback = rho*(lambda-log(rho))
gen policy2 = (1-rho)*(gamma-log(1-rho))+rho*(lambda-log(rho)) + rho*(1-rho)*(lambda-gamma+log(1/rho-1))*mb
gen policystar = log(MBcomp^(-rho)*GAMMAcomp + MBcomp^(1-rho)*LAMBDAcomp)
gen netissue = log(sgrowth)
gen assetgrowth = log(agrowth)
gen capex = log(1+capx/at)
gen leverage = log(1+lt/at)
gen cash = ch/at
gen DP = Dcomp / MEcomp_L1
*/ 

* Lagged market equity and sales
bys gvkey (year): gen ME_L1 = ME[_n-1] if year==year[_n-1]+1
bys gvkey (year): gen sale_L1 = sale[_n-1] if year==year[_n-1]+1

* Compute portfolio-level duration, defined as in Equation (2) of Weber (2018, JFE)
* Parameters for duration Calculation following Weber 2018
local discount_rate = 0.12 // discount rate 
local AR_ROE = 0.4067106129 // AR(1) coefficient of ROE, for terminal value
local ROE_SS = 0.12 // steady-state ROE, for terminal value
local AR_SG = 0.2411083953 // AR(1) coefficient BE equity growth
local SG_SS = 0.06 // steady-state sales growth rate
local horizon = 15

gen s_1 = (1 - `AR_SG') * `SG_SS' + `AR_SG' * log(sale/sale_L1)
gen RonE_1 = (1 - `AR_ROE') * `ROE_SS' + `AR_ROE' * ROE
gen RonECS_1 = (1 - `AR_ROE') * `ROE_SS' + `AR_ROE' * ((1+ROE)*SCALE-1)
gen BV_1 = BE * (1 + s_1)
gen CF_MW_1 = BE * (1 + RonE_1) - BV_1
gen CF_MWCS_1 = BE * (1 + RonECS_1) - BV_1
gen PV_CF_MW = CF_MW_1 / (1 + `discount_rate')
gen PV_CF_MWCS = CF_MWCS_1 / (1 + `discount_rate')
gen t_PV_CF_MW = CF_MW_1 / (1 + `discount_rate')
gen t_PV_CF_MWCS = CF_MWCS_1 / (1 + `discount_rate')

gen s_L1 = s_1
gen RonE_L1 = RonE_1
gen RonECS_L1 = RonECS_1
gen BV_L1 = BV_1

forvalues i = 2/`horizon' {
	gen s_`i' = (1 - `AR_SG') * `SG_SS' + `AR_SG' * s_L1
	gen RonE_`i' = (1 - `AR_ROE') * `ROE_SS' + `AR_ROE' * RonE_L1 
	gen RonECS_`i' = (1 - `AR_ROE') * `ROE_SS' + `AR_ROE' * RonECS_L1 
	gen BV_`i' = BV_L1 * (1 + s_`i') 
	gen CF_MW_`i' = BV_L1 * (1 + RonE_`i') - BV_`i' // Clean-surplus dividend: includes repurchases, but treats issuance as negative cash-flow
	gen CF_MWCS_`i' = BV_L1 * (1 + RonECS_`i') - BV_`i' // Clean-surplus dividend: includes repurchases, but treats issuance as negative cash-flow
	replace PV_CF_MW = PV_CF_MW + CF_MW_`i' * (1 + `discount_rate')^(-`i')
	replace PV_CF_MW = PV_CF_MWCS + CF_MWCS_`i' * (1 + `discount_rate')^(-`i')
	replace t_PV_CF_MW = PV_CF_MW + `i' * CF_MW_`i' * (1 + `discount_rate')^(-`i')
	replace t_PV_CF_MWCS = PV_CF_MWCS + `i' * CF_MWCS_`i' * (1 + `discount_rate')^(-`i')	
	replace s_L1 = s_`i'
	replace RonE_L1 = RonE_`i'
	replace RonECS_L1 = RonECS_`i'	
	replace BV_L1 = BV_`i'
}

gen w_MW = PV_CF_MW / ME
gen w_MWCS = PV_CF_MWCS / ME
gen Dur_MW = w_MW * t_PV_CF_MW / PV_CF_MW + (1-w_MW) * (`horizon' + (1 + `discount_rate')/`discount_rate')
gen Dur_MWCS = w_MWCS * t_PV_CF_MWCS / PV_CF_MWCS + (1-w_MWCS) * (`horizon' + (1 + `discount_rate')/`discount_rate')
bys gvkey (year): gen Dur_MW_L1 = Dur_MW[_n-1] if year==year[_n-1]+1
bys gvkey (year): gen Dur_MWCS_L1 = Dur_MWCS[_n-1] if year==year[_n-1]+1

* Save data permanently
sort gvkey year
drop if mi(gvkey)
drop miss
save "Data/Created/Annual Data 2021", replace

/*
* Approximate linear identity error for the composite portfolio
gen R_err8 = -r + roe + scale + rho * mb - mb_L1
summarize R_err8, detail
*/

 
********************************************************************************
* 01.03 Data requirements and characteristic bins
********************************************************************************

use "Data/Created/Annual Data 2021", clear

* Merge 10 FF industry categories
gen index = [_n]          
	merge 1:m index using "~\Dropbox\Research\04 - Present Value Identity\Data\ff\Created\Siccodes10", nogen keepusing(indnum sic*)      
	gen ind10 = .          
	forvalues i = 1/100 {             
		replace ind10 = indnum[`i'] if (siccd >= sic_beg[`i']) * (siccd <= sic_end[`i']) * (ind10==.) * (siccd!=.)       
	}      
	replace ind10 = 10 if mi(ind10) & ~mi(siccd)
	sort gvkey year

* Fama-French requirement based on whether ME or MB missing
bys gvkey (year): g ffdata_L1 = cond(mi(ME[_n-1]) | mi(mb_L1), 0, 1)

* Further data requirement: r12m and fundamental data not missing currently and previously
foreach var in r roe scale gamma sgrowth assetgrowth mom R ROE SCALE OP exchcd ///
		pva {
	forvalues L = 1/5 {
		bys gvkey (year): gen `var'_L`L' = `var'[_n-`L'] if year==year[_n-`L']+`L'
	}
}
foreach var in bm {
	forvalues L = 2/5 {
		local i = `L'-1
		bys gvkey (year): gen `var'_L`L' = `var'_L1[_n-`i'] if year==year[_n-`i']+`i'
	}
} 
g datareq = cond(~mi(r) & ~mi(mb) & ~mi(roe) & ~mi(scale) & ///
				~mi(r_L1) & ~mi(mb_L1) & ~mi(roe_L1) & ~mi(scale_L1), 1, 0) 

/*				
* Vuolteenaho data requirement (Note vuolreq = 1 if requirement met)
g vuolreq = 1
* 1. A firm must have t-1, t-2, t-3 book equity 
capt drop aux
forval i = 1/3 {
	bys gvkey (year): g aux = BE[_n-`i'] if year==year[_n-`i']+`i'
	replace vuolreq=0 if mi(aux)
	drop aux
	}
* 2. A firm must have t-1, t-2 net income 
forval i = 1/2 {
	bys gvkey (year): g aux = ni[_n-`i'] if year==year[_n-`i']+`i'
	replace vuolreq=0 if mi(aux)
	drop aux
	} 
* 3. A firm must have t-1, t-2 long-term debt
forval i = 1/2 {
	bys gvkey (year): g aux = dltt[_n-`i'] if year==year[_n-`i']+`i'
	replace vuolreq=0 if mi(aux)
	drop aux
	} 
* 4. A firm must have t-1, t-2, t-3 market equity 
forval i = 1/3 {
	bys gvkey (year): g aux = ME[_n-`i'] if year==year[_n-`i']+`i'
	replace vuolreq=0 if mi(aux)
	drop aux
	} 
* 5. Exclude firms that have t - 1 market equity less than $10 million 
bys gvkey (year): replace vuolreq=0 if ME[_n-1] < 10 
* 6. Exclude firms whose book-to-market more than 100 or less than 1/100 
replace vuolreq=0 if MB_L1 > 100 & ~mi(MB_L1)
replace vuolreq=0 if MB_L1 < 1/100 & ~mi(MB_L1)
* 7. A valid trade during the month immediately preceding the period t return 
replace vuolreq=0 if mi(vol_L1) | vol_L1==0
* 8. At least one monthly return observation during each of the preceding five years (Unncessary in our case)
replace vuolreq=0 if Iret==0
* 9. Firm can't lose more than book equity
replace vuolreq=0 if BE_L1 + Y <= 0
* 10. December fiscal year-end 
replace vuolreq=0 if fyr!=12
*/

* NYSE size, book-to-market, roe, asset growth, momentum, scale, profitability deciles
foreach char of varlist ME_L1 bm_L1 mb_L1 ROE_L1 assetgrowth_L1 mom_L1 SCALE_L1 OP_L1 Dur_MW_L1 Dur_MWCS_L1 bm{
	forvalues j = 10(10)90 {
		gsort year exchcd-ffdata_L1
		by year: egen p`j' = pctile(`char') if exchcd==1 & ffdata_L1==1, p(`j')
		replace p`j' = p`j'[_n-1] if year==year[_n-1] & mi(p`j')
		}
	qui g bin_`char' = 1 if `char' <= p10 & ~mi(`char')
	forvalues i = 2/9 {
		local j = 10*`i'
		qui replace bin_`char' = `i' if `char' < p`j' & ~mi(`char') & mi(bin_`char')
		}
	qui replace bin_`char' = 10 if (`char' >= p90) & ~mi(`char') & mi(bin_`char')
	drop p10-p90
	}
	
* Quintiles and triples
foreach char of varlist ME_L1 bm_L1 mb_L1 ROE_L1 assetgrowth_L1 mom_L1 SCALE_L1 OP_L1 Dur_MW_L1 Dur_MWCS_L1{
		* Quintile sorts
		gen binq_`char' = ceil(bin_`char'/2)
		* Triple sorts
		gen bintr_`char' = cond(bin_`char' <= 3, 1, ///
							cond(bin_`char' <= 7, 2, 3)) if ~mi(bin_`char')
	}  
 
* Size requirement based on NYSE cutoffs
g size_L1 = cond(bin_ME_L1 == 1, 0, 1)

* Sample period
keep if year >= 1926 & year <= 2022

* Define firm 
egen firm = group(gvkey)

* Save data permanently
sort firm year
save "Data/Created/Annual Data 2021 with Bins", replace

* Compute Greenwood-Hanson issuer-repurchases spread for book-to-market
// foreach char of varlist ME_L1 bm_L1 ROE_L1 assetgrowth_L1 mom_L1 SCALE_L1 OP_L1 Dur_MW_L1 Dur_MWCS_L1{
foreach char of varlist bm{
	gen GH`char'i = GHissuer*bin_`char'
	gen GH`char'r = GHrep*bin_`char'
	}  


drop GHissuer GHrep
collapse GH*, by(year)
// foreach char in ME_L1 bm_L1 ROE_L1 assetgrowth_L1 mom_L1 SCALE_L1 OP_L1 Dur_MW_L1 Dur_MWCS_L1{
foreach char in bm{
	gen GH`char' = GH`char'i - GH`char'r
}



save "Data/Created/Annual GH 2021", replace
 
/*
* Drop ME_L1 if not needed
drop ME_L1
*/
/*
* Drop a duplicate (look into it again)
drop if gvkey=="022221" & fyr==12 & year==2003
*/

 
 

********************************************************************************
* 01.04  Estimate Intangible Capital
********************************************************************************

* Load annual CCM

u "~/Dropbox/Research/04 - Present Value Identity/Data/compustat/Downloaded/all ccm annual 1950-2020", clear

* Lowercase

ren LPERMNO permno
ren GVKEY gvkey
	
* Keep if the operation is 12-month

keep if pddur == 12

* Convert fiscal year of calendar year and keep the latest fiscal year-end observation

bys permno: replace fyr = fyr[_n-1] if mi(fyr)
g year = cond(fyr <= 5, fyear + 2, fyear + 1)
bys gvkey year (fyr): keep if _n == _N

* Industry code

destring sic, replace force
destring sich, replace force

replace sic = sich if ~mi(sich)				/* Use historical SIC as default but replace it with the current one if it is missing */

* Keep relevant variables

keep gvkey year xsga xrd sic

* Tsfill and carryforward

egen firm = group(gvkey)
xtset firm year
tsfill


foreach var of varlist gvkey sic{
	bysort firm: carryforward `var', replace
	}

* Replace missing or negative values to 0

foreach var of varlist xsga xrd{
	replace `var' = 0 if mi(`var') | `var' < 0
	}	

/* Deflate the stock of intangible to real series */

if `real' == 1{

	tempfile ccm
	save "`ccm'"

	* Import CPI data and merge with Compustat
	
	import excel "~/Dropbox/Research/04 - Present Value Identity/Data/intangible/CPI.xlsx", sheet("Cleaned Version") firstrow clear

	ren Dec cpi		/* CPI as of December */
	ren Year year

	merge 1:m year using "`ccm'", keep(2 3) nogen
	sort gvkey year

	* Deflate to real series
	replace xsga = xsga/cpi
	replace xrd = xrd/cpi
	
	}

/* Compute intangibles */

* 1. Capitalizing XSGA

* Compute org capital following Eisfeldt and Papanikoalou (2013)

g intinv = xsga
g intcap = .
bys gvkey (year): replace intcap = (1-`delta')*intinv/(`g'+`delta') + intinv ///
	if _n==1
bys gvkey (year): replace intcap = (1-`delta')*intcap[_n-1] + intinv ///
	if year==year[_n-1]+1 & _n>1
	
* Gross investment
	
bys gvkey (year): replace intinv = intinv - `delta'*intcap[_n-1] ///
	if year==year[_n-1]+1 & _n>1

* Save data

keep gvkey year intcap* intinv*
save "Data/Created/intangible", replace
